In [0]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import KMeans
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.svm import SVC
In [0]:
vhcl=pd.read_csv('vehicle_pca.csv')
In [0]:
vhcl.head()
Out[0]:
compactness circularity distance_circularity radius_ratio pr.axis_aspect_ratio max.length_aspect_ratio scatter_ratio elongatedness pr.axis_rectangularity max.length_rectangularity scaled_variance scaled_variance.1 scaled_radius_of_gyration scaled_radius_of_gyration.1 skewness_about skewness_about.1 skewness_about.2 hollows_ratio class
0 95 48.0 83.0 178.0 72.0 10 162.0 42.0 20.0 159 176.0 379.0 184.0 70.0 6.0 16.0 187.0 197 van
1 91 41.0 84.0 141.0 57.0 9 149.0 45.0 19.0 143 170.0 330.0 158.0 72.0 9.0 14.0 189.0 199 van
2 104 50.0 106.0 209.0 66.0 10 207.0 32.0 23.0 158 223.0 635.0 220.0 73.0 14.0 9.0 188.0 196 car
3 93 41.0 82.0 159.0 63.0 9 144.0 46.0 19.0 143 160.0 309.0 127.0 63.0 6.0 10.0 199.0 207 van
4 85 44.0 70.0 205.0 103.0 52 149.0 45.0 19.0 144 241.0 325.0 188.0 127.0 9.0 11.0 180.0 183 bus
In [0]:
vhcl[vhcl.notnull()]
Out[0]:
compactness circularity distance_circularity radius_ratio pr.axis_aspect_ratio max.length_aspect_ratio scatter_ratio elongatedness pr.axis_rectangularity max.length_rectangularity scaled_variance scaled_variance.1 scaled_radius_of_gyration scaled_radius_of_gyration.1 skewness_about skewness_about.1 skewness_about.2 hollows_ratio class
0 95 48.0 83.0 178.0 72.0 10 162.0 42.0 20.0 159 176.0 379.0 184.0 70.0 6.0 16.0 187.0 197 van
1 91 41.0 84.0 141.0 57.0 9 149.0 45.0 19.0 143 170.0 330.0 158.0 72.0 9.0 14.0 189.0 199 van
2 104 50.0 106.0 209.0 66.0 10 207.0 32.0 23.0 158 223.0 635.0 220.0 73.0 14.0 9.0 188.0 196 car
3 93 41.0 82.0 159.0 63.0 9 144.0 46.0 19.0 143 160.0 309.0 127.0 63.0 6.0 10.0 199.0 207 van
4 85 44.0 70.0 205.0 103.0 52 149.0 45.0 19.0 144 241.0 325.0 188.0 127.0 9.0 11.0 180.0 183 bus
5 107 NaN 106.0 172.0 50.0 6 255.0 26.0 28.0 169 280.0 957.0 264.0 85.0 5.0 9.0 181.0 183 bus
6 97 43.0 73.0 173.0 65.0 6 153.0 42.0 19.0 143 176.0 361.0 172.0 66.0 13.0 1.0 200.0 204 bus
7 90 43.0 66.0 157.0 65.0 9 137.0 48.0 18.0 146 162.0 281.0 164.0 67.0 3.0 3.0 193.0 202 van
8 86 34.0 62.0 140.0 61.0 7 122.0 54.0 17.0 127 141.0 223.0 112.0 64.0 2.0 14.0 200.0 208 van
9 93 44.0 98.0 NaN 62.0 11 183.0 36.0 22.0 146 202.0 505.0 152.0 64.0 4.0 14.0 195.0 204 car
10 86 36.0 70.0 143.0 61.0 9 133.0 50.0 18.0 130 153.0 266.0 127.0 66.0 2.0 10.0 194.0 202 van
11 90 34.0 66.0 136.0 55.0 6 123.0 54.0 17.0 118 148.0 224.0 118.0 65.0 5.0 26.0 196.0 202 car
12 88 46.0 74.0 171.0 68.0 6 152.0 43.0 19.0 148 180.0 349.0 192.0 71.0 5.0 11.0 189.0 195 bus
13 89 42.0 85.0 144.0 58.0 10 152.0 44.0 19.0 144 173.0 345.0 161.0 72.0 8.0 13.0 187.0 197 van
14 94 49.0 79.0 203.0 71.0 5 174.0 37.0 21.0 154 196.0 465.0 206.0 71.0 6.0 2.0 197.0 199 bus
15 96 55.0 103.0 201.0 65.0 9 204.0 32.0 23.0 166 227.0 624.0 246.0 74.0 6.0 2.0 186.0 194 car
16 89 36.0 51.0 109.0 52.0 6 118.0 57.0 17.0 129 137.0 206.0 125.0 80.0 2.0 14.0 181.0 185 van
17 99 41.0 77.0 197.0 69.0 6 177.0 36.0 21.0 139 202.0 485.0 151.0 72.0 4.0 10.0 198.0 199 bus
18 104 54.0 100.0 186.0 61.0 10 216.0 31.0 24.0 173 225.0 686.0 220.0 74.0 5.0 11.0 185.0 195 car
19 101 56.0 100.0 215.0 NaN 10 208.0 32.0 24.0 169 227.0 651.0 223.0 74.0 6.0 5.0 186.0 193 car
20 84 47.0 75.0 153.0 64.0 6 154.0 43.0 19.0 145 175.0 354.0 184.0 75.0 0.0 3.0 185.0 192 bus
21 84 37.0 53.0 121.0 59.0 5 123.0 55.0 17.0 125 141.0 221.0 133.0 82.0 7.0 1.0 179.0 183 van
22 94 43.0 64.0 173.0 69.0 7 150.0 43.0 19.0 142 169.0 344.0 177.0 68.0 9.0 1.0 199.0 206 bus
23 87 39.0 70.0 148.0 61.0 7 143.0 46.0 18.0 136 164.0 307.0 141.0 69.0 1.0 2.0 192.0 199 bus
24 99 53.0 105.0 219.0 66.0 11 204.0 32.0 23.0 165 221.0 623.0 224.0 68.0 0.0 6.0 191.0 201 car
25 85 45.0 80.0 154.0 64.0 9 147.0 45.0 19.0 148 169.0 324.0 174.0 71.0 1.0 4.0 188.0 199 van
26 83 36.0 54.0 119.0 57.0 6 128.0 53.0 18.0 125 143.0 238.0 139.0 82.0 6.0 3.0 179.0 183 car
27 107 54.0 98.0 203.0 65.0 11 218.0 31.0 25.0 167 229.0 696.0 216.0 72.0 1.0 28.0 187.0 199 car
28 102 45.0 85.0 193.0 64.0 6 192.0 33.0 22.0 146 217.0 570.0 163.0 76.0 6.0 7.0 195.0 193 bus
29 80 38.0 63.0 129.0 55.0 7 146.0 46.0 19.0 130 168.0 314.0 158.0 83.0 9.0 20.0 180.0 185 car
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
816 90 48.0 85.0 157.0 64.0 11 161.0 43.0 20.0 167 175.0 375.0 186.0 74.0 3.0 16.0 185.0 195 van
817 104 53.0 108.0 204.0 64.0 11 220.0 31.0 25.0 172 226.0 707.0 203.0 71.0 14.0 30.0 189.0 203 car
818 95 43.0 96.0 202.0 65.0 10 189.0 35.0 22.0 143 217.0 534.0 166.0 71.0 6.0 27.0 190.0 197 car
819 93 42.0 98.0 192.0 63.0 9 185.0 36.0 22.0 138 206.0 508.0 173.0 70.0 10.0 21.0 189.0 197 car
820 87 38.0 71.0 123.0 53.0 8 137.0 49.0 18.0 127 158.0 277.0 145.0 75.0 0.0 9.0 181.0 186 car
821 104 56.0 96.0 231.0 74.0 11 220.0 30.0 25.0 172 223.0 713.0 218.0 73.0 6.0 16.0 186.0 195 car
822 95 41.0 82.0 170.0 65.0 9 145.0 46.0 19.0 145 163.0 314.0 140.0 64.0 4.0 8.0 199.0 207 van
823 105 54.0 105.0 213.0 67.0 10 200.0 33.0 23.0 163 214.0 597.0 214.0 68.0 10.0 20.0 190.0 198 car
824 106 55.0 96.0 196.0 60.0 12 221.0 30.0 25.0 173 225.0 717.0 214.0 72.0 9.0 13.0 186.0 196 car
825 86 39.0 84.0 149.0 57.0 8 156.0 43.0 20.0 133 185.0 358.0 157.0 74.0 0.0 23.0 183.0 190 car
826 95 49.0 92.0 193.0 62.0 10 178.0 37.0 21.0 154 200.0 478.0 171.0 64.0 2.0 0.0 198.0 206 car
827 99 57.0 100.0 177.0 54.0 13 224.0 30.0 25.0 188 223.0 726.0 213.0 72.0 4.0 7.0 185.0 198 car
828 89 42.0 66.0 125.0 53.0 7 131.0 51.0 18.0 144 162.0 254.0 162.0 73.0 10.0 17.0 188.0 191 van
829 95 49.0 82.0 139.0 56.0 11 159.0 43.0 20.0 162 173.0 365.0 185.0 75.0 7.0 10.0 182.0 191 van
830 97 37.0 70.0 173.0 66.0 7 151.0 43.0 19.0 129 167.0 346.0 119.0 65.0 0.0 16.0 201.0 208 bus
831 100 47.0 70.0 185.0 70.0 7 162.0 40.0 20.0 153 179.0 406.0 172.0 68.0 9.0 6.0 200.0 205 bus
832 108 49.0 109.0 204.0 61.0 11 212.0 31.0 24.0 159 229.0 665.0 215.0 71.0 16.0 11.0 190.0 199 car
833 92 46.0 83.0 154.0 56.0 6 160.0 41.0 20.0 148 185.0 382.0 184.0 71.0 10.0 5.0 186.0 191 car
834 82 36.0 51.0 114.0 53.0 4 135.0 50.0 18.0 126 150.0 268.0 144.0 86.0 15.0 4.0 181.0 182 car
835 111 58.0 105.0 183.0 51.0 6 265.0 26.0 29.0 174 285.0 1018.0 255.0 85.0 4.0 8.0 181.0 183 bus
836 87 45.0 66.0 139.0 58.0 8 140.0 47.0 18.0 148 168.0 294.0 175.0 73.0 3.0 12.0 188.0 196 van
837 94 46.0 77.0 169.0 60.0 8 158.0 42.0 20.0 148 181.0 373.0 181.0 67.0 12.0 2.0 193.0 199 car
838 95 43.0 76.0 142.0 57.0 10 151.0 44.0 19.0 149 173.0 339.0 159.0 71.0 2.0 23.0 187.0 200 van
839 90 44.0 72.0 157.0 64.0 8 137.0 48.0 18.0 144 159.0 283.0 171.0 65.0 9.0 4.0 196.0 203 van
840 93 34.0 66.0 140.0 56.0 7 130.0 51.0 18.0 120 151.0 251.0 114.0 62.0 5.0 29.0 201.0 207 car
841 93 39.0 87.0 183.0 64.0 8 169.0 40.0 20.0 134 200.0 422.0 149.0 72.0 7.0 25.0 188.0 195 car
842 89 46.0 84.0 163.0 66.0 11 159.0 43.0 20.0 159 173.0 368.0 176.0 72.0 1.0 20.0 186.0 197 van
843 106 54.0 101.0 222.0 67.0 12 222.0 30.0 25.0 173 228.0 721.0 200.0 70.0 3.0 4.0 187.0 201 car
844 86 36.0 78.0 146.0 58.0 7 135.0 50.0 18.0 124 155.0 270.0 148.0 66.0 0.0 25.0 190.0 195 car
845 85 36.0 66.0 123.0 55.0 5 120.0 56.0 17.0 128 140.0 212.0 131.0 73.0 1.0 18.0 186.0 190 van

846 rows × 19 columns

In [0]:
  vhcl.isnull().sum()
Out[0]:
compactness                    0
circularity                    5
distance_circularity           4
radius_ratio                   6
pr.axis_aspect_ratio           2
max.length_aspect_ratio        0
scatter_ratio                  1
elongatedness                  1
pr.axis_rectangularity         3
max.length_rectangularity      0
scaled_variance                3
scaled_variance.1              2
scaled_radius_of_gyration      2
scaled_radius_of_gyration.1    4
skewness_about                 6
skewness_about.1               1
skewness_about.2               1
hollows_ratio                  0
class                          0
dtype: int64
In [0]:
# Only three columns  in our dataset does-not contain null value apart from these three columns all the columns have the count of less than 846.
In [0]:
# Furhter verifying the same using the Describe function (). 

vhcl.describe().transpose()
Out[0]:
count mean std min 25% 50% 75% max
compactness 846.0 93.678487 8.234474 73.0 87.00 93.0 100.0 119.0
circularity 841.0 44.828775 6.152172 33.0 40.00 44.0 49.0 59.0
distance_circularity 842.0 82.110451 15.778292 40.0 70.00 80.0 98.0 112.0
radius_ratio 840.0 168.888095 33.520198 104.0 141.00 167.0 195.0 333.0
pr.axis_aspect_ratio 844.0 61.678910 7.891463 47.0 57.00 61.0 65.0 138.0
max.length_aspect_ratio 846.0 8.567376 4.601217 2.0 7.00 8.0 10.0 55.0
scatter_ratio 845.0 168.901775 33.214848 112.0 147.00 157.0 198.0 265.0
elongatedness 845.0 40.933728 7.816186 26.0 33.00 43.0 46.0 61.0
pr.axis_rectangularity 843.0 20.582444 2.592933 17.0 19.00 20.0 23.0 29.0
max.length_rectangularity 846.0 147.998818 14.515652 118.0 137.00 146.0 159.0 188.0
scaled_variance 843.0 188.631079 31.411004 130.0 167.00 179.0 217.0 320.0
scaled_variance.1 844.0 439.494076 176.666903 184.0 318.00 363.5 587.0 1018.0
scaled_radius_of_gyration 844.0 174.709716 32.584808 109.0 149.00 173.5 198.0 268.0
scaled_radius_of_gyration.1 842.0 72.447743 7.486190 59.0 67.00 71.5 75.0 135.0
skewness_about 840.0 6.364286 4.920649 0.0 2.00 6.0 9.0 22.0
skewness_about.1 845.0 12.602367 8.936081 0.0 5.00 11.0 19.0 41.0
skewness_about.2 845.0 188.919527 6.155809 176.0 184.00 188.0 193.0 206.0
hollows_ratio 846.0 195.632388 7.438797 181.0 190.25 197.0 201.0 211.0
In [0]:
vhcl.boxplot(figsize=(24,15))
Out[0]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f953a03cd30>
In [0]:
  vhcl_df = vhcl.drop('class',axis = 1)
  vhcl_target = vhcl.pop('class')
In [0]:
vhcl_df.isna().sum()
Out[0]:
compactness                    0
circularity                    5
distance_circularity           4
radius_ratio                   6
pr.axis_aspect_ratio           2
max.length_aspect_ratio        0
scatter_ratio                  1
elongatedness                  1
pr.axis_rectangularity         3
max.length_rectangularity      0
scaled_variance                3
scaled_variance.1              2
scaled_radius_of_gyration      2
scaled_radius_of_gyration.1    4
skewness_about                 6
skewness_about.1               1
skewness_about.2               1
hollows_ratio                  0
dtype: int64
In [0]:
vhcl_df.mean()
Out[0]:
compactness                     93.678487
circularity                     44.828775
distance_circularity            82.110451
radius_ratio                   168.888095
pr.axis_aspect_ratio            61.678910
max.length_aspect_ratio          8.567376
scatter_ratio                  168.901775
elongatedness                   40.933728
pr.axis_rectangularity          20.582444
max.length_rectangularity      147.998818
scaled_variance                188.631079
scaled_variance.1              439.494076
scaled_radius_of_gyration      174.709716
scaled_radius_of_gyration.1     72.447743
skewness_about                   6.364286
skewness_about.1                12.602367
skewness_about.2               188.919527
hollows_ratio                  195.632388
dtype: float64
In [0]:
vhcl_df.median()
Out[0]:
compactness                     93.0
circularity                     44.0
distance_circularity            80.0
radius_ratio                   167.0
pr.axis_aspect_ratio            61.0
max.length_aspect_ratio          8.0
scatter_ratio                  157.0
elongatedness                   43.0
pr.axis_rectangularity          20.0
max.length_rectangularity      146.0
scaled_variance                179.0
scaled_variance.1              363.5
scaled_radius_of_gyration      173.5
scaled_radius_of_gyration.1     71.5
skewness_about                   6.0
skewness_about.1                11.0
skewness_about.2               188.0
hollows_ratio                  197.0
dtype: float64
In [0]:
vhcl_df =vhcl_df.fillna(vhcl_df.median())

            
In [0]:
vhcl_df.isnull().sum()
Out[0]:
compactness                    0
circularity                    0
distance_circularity           0
radius_ratio                   0
pr.axis_aspect_ratio           0
max.length_aspect_ratio        0
scatter_ratio                  0
elongatedness                  0
pr.axis_rectangularity         0
max.length_rectangularity      0
scaled_variance                0
scaled_variance.1              0
scaled_radius_of_gyration      0
scaled_radius_of_gyration.1    0
skewness_about                 0
skewness_about.1               0
skewness_about.2               0
hollows_ratio                  0
dtype: int64
In [0]:
 
In [0]:
from scipy.stats import zscore
In [0]:
vhclz_df=vhcl_df.apply(zscore)
In [0]:
vhclz_df.head()
Out[0]:
compactness circularity distance_circularity radius_ratio pr.axis_aspect_ratio max.length_aspect_ratio scatter_ratio elongatedness pr.axis_rectangularity max.length_rectangularity scaled_variance scaled_variance.1 scaled_radius_of_gyration scaled_radius_of_gyration.1 skewness_about skewness_about.1 skewness_about.2 hollows_ratio
0 0.160580 0.518073 0.057177 0.273363 1.310398 0.311542 -0.207598 0.136262 -0.224342 0.758332 -0.401920 -0.341934 0.285705 -0.327326 -0.073812 0.380870 -0.312012 0.183957
1 -0.325470 -0.623732 0.120741 -0.835032 -0.593753 0.094079 -0.599423 0.520519 -0.610886 -0.344578 -0.593357 -0.619724 -0.513630 -0.059384 0.538390 0.156798 0.013265 0.452977
2 1.254193 0.844303 1.519141 1.202018 0.548738 0.311542 1.148719 -1.144597 0.935290 0.689401 1.097671 1.109379 1.392477 0.074587 1.558727 -0.403383 -0.149374 0.049447
3 -0.082445 -0.623732 -0.006386 -0.295813 0.167907 0.094079 -0.750125 0.648605 -0.610886 -0.344578 -0.912419 -0.738777 -1.466683 -1.265121 -0.073812 -0.291347 1.639649 1.529056
4 -1.054545 -0.134387 -0.769150 1.082192 5.245643 9.444962 -0.599423 0.520519 -0.610886 -0.275646 1.671982 -0.648070 0.408680 7.309005 0.538390 -0.179311 -1.450481 -1.699181
In [0]:
vhcl_df = vhclz_df.join(vhcl_target)
In [0]:
vhcl_df.head()
Out[0]:
compactness circularity distance_circularity radius_ratio pr.axis_aspect_ratio max.length_aspect_ratio scatter_ratio elongatedness pr.axis_rectangularity max.length_rectangularity scaled_variance scaled_variance.1 scaled_radius_of_gyration scaled_radius_of_gyration.1 skewness_about skewness_about.1 skewness_about.2 hollows_ratio class
0 0.160580 0.518073 0.057177 0.273363 1.310398 0.311542 -0.207598 0.136262 -0.224342 0.758332 -0.401920 -0.341934 0.285705 -0.327326 -0.073812 0.380870 -0.312012 0.183957 van
1 -0.325470 -0.623732 0.120741 -0.835032 -0.593753 0.094079 -0.599423 0.520519 -0.610886 -0.344578 -0.593357 -0.619724 -0.513630 -0.059384 0.538390 0.156798 0.013265 0.452977 van
2 1.254193 0.844303 1.519141 1.202018 0.548738 0.311542 1.148719 -1.144597 0.935290 0.689401 1.097671 1.109379 1.392477 0.074587 1.558727 -0.403383 -0.149374 0.049447 car
3 -0.082445 -0.623732 -0.006386 -0.295813 0.167907 0.094079 -0.750125 0.648605 -0.610886 -0.344578 -0.912419 -0.738777 -1.466683 -1.265121 -0.073812 -0.291347 1.639649 1.529056 van
4 -1.054545 -0.134387 -0.769150 1.082192 5.245643 9.444962 -0.599423 0.520519 -0.610886 -0.275646 1.671982 -0.648070 0.408680 7.309005 0.538390 -0.179311 -1.450481 -1.699181 bus
In [0]:
sns.pairplot(vhclz_df,diag_kind='kde')
Out[0]:
<seaborn.axisgrid.PairGrid at 0x7f9537657f28>
In [0]:
## IN most of the Data we see that there are 3 peaks but we also see that there are 3-4 peaks all around 
vhclz_df.corr()
Out[0]:
compactness circularity distance_circularity radius_ratio pr.axis_aspect_ratio max.length_aspect_ratio scatter_ratio elongatedness pr.axis_rectangularity max.length_rectangularity scaled_variance scaled_variance.1 scaled_radius_of_gyration scaled_radius_of_gyration.1 skewness_about skewness_about.1 skewness_about.2 hollows_ratio
compactness 1.000000 0.684887 0.789928 0.689743 0.091534 0.148249 0.812620 -0.788750 0.813694 0.676143 0.762070 0.814012 0.585243 -0.249593 0.236078 0.157015 0.298537 0.365552
circularity 0.684887 1.000000 0.792320 0.620912 0.153778 0.251467 0.847938 -0.821472 0.843400 0.961318 0.796306 0.835946 0.925816 0.051946 0.144198 -0.011439 -0.104426 0.046351
distance_circularity 0.789928 0.792320 1.000000 0.767035 0.158456 0.264686 0.905076 -0.911307 0.893025 0.774527 0.861519 0.886017 0.705771 -0.225944 0.113924 0.265547 0.146098 0.332732
radius_ratio 0.689743 0.620912 0.767035 1.000000 0.663447 0.450052 0.734429 -0.789481 0.708385 0.568949 0.793415 0.718436 0.536372 -0.180397 0.048713 0.173741 0.382214 0.471309
pr.axis_aspect_ratio 0.091534 0.153778 0.158456 0.663447 1.000000 0.648724 0.103732 -0.183035 0.079604 0.126909 0.272910 0.089189 0.121971 0.152950 -0.058371 -0.031976 0.239886 0.267725
max.length_aspect_ratio 0.148249 0.251467 0.264686 0.450052 0.648724 1.000000 0.166191 -0.180140 0.161502 0.305943 0.318957 0.143253 0.189743 0.295735 0.015599 0.043422 -0.026081 0.143919
scatter_ratio 0.812620 0.847938 0.905076 0.734429 0.103732 0.166191 1.000000 -0.971601 0.989751 0.809083 0.948662 0.993012 0.799875 -0.027542 0.074458 0.212428 0.005628 0.118817
elongatedness -0.788750 -0.821472 -0.911307 -0.789481 -0.183035 -0.180140 -0.971601 1.000000 -0.948996 -0.775854 -0.936382 -0.953816 -0.766314 0.103302 -0.052600 -0.185053 -0.115126 -0.216905
pr.axis_rectangularity 0.813694 0.843400 0.893025 0.708385 0.079604 0.161502 0.989751 -0.948996 1.000000 0.810934 0.934227 0.988213 0.796690 -0.015495 0.083767 0.214700 -0.018649 0.099286
max.length_rectangularity 0.676143 0.961318 0.774527 0.568949 0.126909 0.305943 0.809083 -0.775854 0.810934 1.000000 0.744985 0.794615 0.866450 0.041622 0.135852 0.001366 -0.103948 0.076770
scaled_variance 0.762070 0.796306 0.861519 0.793415 0.272910 0.318957 0.948662 -0.936382 0.934227 0.744985 1.000000 0.945678 0.778917 0.113078 0.036729 0.194239 0.014219 0.085695
scaled_variance.1 0.814012 0.835946 0.886017 0.718436 0.089189 0.143253 0.993012 -0.953816 0.988213 0.794615 0.945678 1.000000 0.795017 -0.015401 0.076877 0.200811 0.006219 0.102935
scaled_radius_of_gyration 0.585243 0.925816 0.705771 0.536372 0.121971 0.189743 0.799875 -0.766314 0.796690 0.866450 0.778917 0.795017 1.000000 0.191473 0.166483 -0.056153 -0.224450 -0.118002
scaled_radius_of_gyration.1 -0.249593 0.051946 -0.225944 -0.180397 0.152950 0.295735 -0.027542 0.103302 -0.015495 0.041622 0.113078 -0.015401 0.191473 1.000000 -0.088355 -0.126183 -0.748865 -0.802123
skewness_about 0.236078 0.144198 0.113924 0.048713 -0.058371 0.015599 0.074458 -0.052600 0.083767 0.135852 0.036729 0.076877 0.166483 -0.088355 1.000000 -0.034990 0.115297 0.097126
skewness_about.1 0.157015 -0.011439 0.265547 0.173741 -0.031976 0.043422 0.212428 -0.185053 0.214700 0.001366 0.194239 0.200811 -0.056153 -0.126183 -0.034990 1.000000 0.077310 0.204990
skewness_about.2 0.298537 -0.104426 0.146098 0.382214 0.239886 -0.026081 0.005628 -0.115126 -0.018649 -0.103948 0.014219 0.006219 -0.224450 -0.748865 0.115297 0.077310 1.000000 0.892581
hollows_ratio 0.365552 0.046351 0.332732 0.471309 0.267725 0.143919 0.118817 -0.216905 0.099286 0.076770 0.085695 0.102935 -0.118002 -0.802123 0.097126 0.204990 0.892581 1.000000
In [0]:
# Using corr we can check what all values are actually which shows high correlation. 
#Scattered ratio is very much realted with the scaled variance we see that from the realtion

from scipy.spatial.distance import cdist
clusters = range(1,10)
meanDistortions = []

for k in clusters:
    model= KMeans(n_clusters=k)
    model.fit(vhclz_df)
    prediction= model.predict(vhclz_df)
    meanDistortions.append(sum(np.min(cdist(vhclz_df,model.cluster_centers_,'euclidean'),axis =1))/vhclz_df.shape[0])
    
plt.plot(clusters,meanDistortions,'bx-')
plt.xlabel('k')
plt.ylabel('Average distortion')
plt.title('Selecting k with elbow method')
Out[0]:
Text(0.5, 1.0, 'Selecting k with elbow method')
In [0]:
#STarting with k = 2 
final_model = KMeans(2)
final_model.fit(vhclz_df)
prediction= final_model.predict(vhclz_df)
vhcl_df['GROUP']= prediction

print ("group assigned :\n")

vhcl_df[['hollows_ratio','class','GROUP']]
# Append the prediction in the main data frame  
group assigned :

Out[0]:
hollows_ratio class GROUP
0 0.183957 van 0
1 0.452977 van 0
2 0.049447 car 1
3 1.529056 van 0
4 -1.699181 bus 0
5 -1.699181 bus 1
6 1.125526 bus 0
7 0.856507 van 0
8 1.663566 van 0
9 1.125526 car 1
10 0.856507 van 0
11 0.856507 car 0
12 -0.085062 bus 0
13 0.183957 van 0
14 0.452977 bus 1
15 -0.219572 car 1
16 -1.430161 van 0
17 0.452977 bus 0
18 -0.085062 car 1
19 -0.354082 car 1
20 -0.488592 bus 0
21 -1.699181 van 0
22 1.394546 bus 0
23 0.452977 bus 0
24 0.721997 car 1
25 0.452977 van 0
26 -1.699181 car 0
27 0.452977 car 1
28 -0.354082 bus 1
29 -1.430161 car 0
... ... ... ...
816 -0.085062 van 0
817 0.991016 car 1
818 0.183957 car 1
819 0.183957 car 1
820 -1.295651 car 0
821 -0.085062 car 1
822 1.529056 van 0
823 0.318467 car 1
824 0.049447 car 1
825 -0.757612 car 0
826 1.394546 car 1
827 0.318467 car 1
828 -0.623102 van 0
829 -0.623102 van 0
830 1.663566 bus 0
831 1.260036 bus 0
832 0.452977 car 1
833 -0.623102 car 0
834 -1.833690 car 0
835 -1.699181 bus 1
836 0.049447 van 0
837 0.452977 car 0
838 0.587487 van 0
839 0.991016 van 0
840 1.529056 car 0
841 -0.085062 car 0
842 0.183957 van 0
843 0.721997 car 1
844 -0.085062 car 0
845 -0.757612 van 0

846 rows × 3 columns

In [0]:
## Now using the box plot to get more information about the data here , Let's prepare the box plot when the numner of groups we are seeing as 0 and 1. 

vhcl_df.boxplot(by = 'GROUP' , layout = (3,6) , figsize = (25,15))
Out[0]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x7f9531862eb8>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f95313adfd0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f9531309908>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f952f872e48>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f952f885e10>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f952f81cdd8>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x7f952f842da0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f952f7ebda0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f952f7ebdd8>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f952f7bfcf8>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f952f769cc0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f952f792c88>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x7f952f73ec50>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f952f6ebc18>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f952f712be0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f952f6bdba8>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f952f669b70>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f952f691b38>]],
      dtype=object)
In [0]:
## Here we see the distribution of the data for the group 0 and 1 and when the value is between -2 to 10 Now moving onto the different case 

# Now running the model for k = 3 and see what all are the values we get :-- 


# Moving with k = 2


vhcl_df.drop('GROUP',axis=1, inplace = True)
In [0]:
vhcl_df.head()
Out[0]:
compactness circularity distance_circularity radius_ratio pr.axis_aspect_ratio max.length_aspect_ratio scatter_ratio elongatedness pr.axis_rectangularity max.length_rectangularity scaled_variance scaled_variance.1 scaled_radius_of_gyration scaled_radius_of_gyration.1 skewness_about skewness_about.1 skewness_about.2 hollows_ratio class
0 0.160580 0.518073 0.057177 0.273363 1.310398 0.311542 -0.207598 0.136262 -0.224342 0.758332 -0.401920 -0.341934 0.285705 -0.327326 -0.073812 0.380870 -0.312012 0.183957 van
1 -0.325470 -0.623732 0.120741 -0.835032 -0.593753 0.094079 -0.599423 0.520519 -0.610886 -0.344578 -0.593357 -0.619724 -0.513630 -0.059384 0.538390 0.156798 0.013265 0.452977 van
2 1.254193 0.844303 1.519141 1.202018 0.548738 0.311542 1.148719 -1.144597 0.935290 0.689401 1.097671 1.109379 1.392477 0.074587 1.558727 -0.403383 -0.149374 0.049447 car
3 -0.082445 -0.623732 -0.006386 -0.295813 0.167907 0.094079 -0.750125 0.648605 -0.610886 -0.344578 -0.912419 -0.738777 -1.466683 -1.265121 -0.073812 -0.291347 1.639649 1.529056 van
4 -1.054545 -0.134387 -0.769150 1.082192 5.245643 9.444962 -0.599423 0.520519 -0.610886 -0.275646 1.671982 -0.648070 0.408680 7.309005 0.538390 -0.179311 -1.450481 -1.699181 bus
In [0]:
final_model = KMeans(3)
final_model.fit(vhclz_df)
prediction= final_model.predict(vhclz_df)
vhcl_df['GROUP']= prediction

print ("group assigned :\n")

vhcl_df[['hollows_ratio','class','GROUP']]
# Append the prediction in the main data frame  
group assigned :

Out[0]:
hollows_ratio class GROUP
0 0.183957 van 2
1 0.452977 van 2
2 0.049447 car 1
3 1.529056 van 2
4 -1.699181 bus 0
5 -1.699181 bus 1
6 1.125526 bus 2
7 0.856507 van 2
8 1.663566 van 2
9 1.125526 car 2
10 0.856507 van 2
11 0.856507 car 2
12 -0.085062 bus 2
13 0.183957 van 2
14 0.452977 bus 2
15 -0.219572 car 1
16 -1.430161 van 0
17 0.452977 bus 2
18 -0.085062 car 1
19 -0.354082 car 1
20 -0.488592 bus 0
21 -1.699181 van 0
22 1.394546 bus 2
23 0.452977 bus 2
24 0.721997 car 1
25 0.452977 van 2
26 -1.699181 car 0
27 0.452977 car 1
28 -0.354082 bus 1
29 -1.430161 car 0
... ... ... ...
816 -0.085062 van 2
817 0.991016 car 1
818 0.183957 car 1
819 0.183957 car 2
820 -1.295651 car 0
821 -0.085062 car 1
822 1.529056 van 2
823 0.318467 car 1
824 0.049447 car 1
825 -0.757612 car 0
826 1.394546 car 2
827 0.318467 car 1
828 -0.623102 van 0
829 -0.623102 van 0
830 1.663566 bus 2
831 1.260036 bus 2
832 0.452977 car 1
833 -0.623102 car 0
834 -1.833690 car 0
835 -1.699181 bus 1
836 0.049447 van 0
837 0.452977 car 2
838 0.587487 van 2
839 0.991016 van 2
840 1.529056 car 2
841 -0.085062 car 2
842 0.183957 van 2
843 0.721997 car 1
844 -0.085062 car 2
845 -0.757612 van 0

846 rows × 3 columns

In [0]:
vhcl_df.boxplot(by = 'GROUP' , layout = (3,6) , figsize = (25,15))
Out[0]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x7f953169e198>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f952ef33048>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f952eebbfd0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f952ee661d0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f952ee8e4a8>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f952ee35780>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x7f952ede0a58>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f952ee09d68>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f952ee09da0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f952ed61320>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f952ed8c5f8>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f952ed348d0>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x7f952ecdbba8>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f952ed05e80>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f952ecb6198>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f952ec5e470>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f952ec88748>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f952ec32a20>]],
      dtype=object)
In [0]:
## so here we see that based on some properties we are seeing that the 3 cluster are divided for every dimension .

# Let's move ahead and see it for k value ==4

vhcl_df.drop('GROUP',axis=1, inplace= True)
In [0]:
final_model = KMeans(4)
final_model.fit(vhclz_df)
prediction= final_model.predict(vhclz_df)
vhcl_df['GROUP']= prediction

print ("group assigned :\n")

vhcl_df[['hollows_ratio','class','GROUP']]
# Append the prediction in the main data frame  
group assigned :

Out[0]:
hollows_ratio class GROUP
0 0.183957 van 2
1 0.452977 van 2
2 0.049447 car 1
3 1.529056 van 2
4 -1.699181 bus 3
5 -1.699181 bus 1
6 1.125526 bus 2
7 0.856507 van 2
8 1.663566 van 2
9 1.125526 car 2
10 0.856507 van 2
11 0.856507 car 2
12 -0.085062 bus 2
13 0.183957 van 2
14 0.452977 bus 2
15 -0.219572 car 1
16 -1.430161 van 0
17 0.452977 bus 2
18 -0.085062 car 1
19 -0.354082 car 1
20 -0.488592 bus 0
21 -1.699181 van 0
22 1.394546 bus 2
23 0.452977 bus 2
24 0.721997 car 1
25 0.452977 van 2
26 -1.699181 car 0
27 0.452977 car 1
28 -0.354082 bus 1
29 -1.430161 car 0
... ... ... ...
816 -0.085062 van 2
817 0.991016 car 1
818 0.183957 car 1
819 0.183957 car 2
820 -1.295651 car 0
821 -0.085062 car 1
822 1.529056 van 2
823 0.318467 car 1
824 0.049447 car 1
825 -0.757612 car 0
826 1.394546 car 2
827 0.318467 car 1
828 -0.623102 van 0
829 -0.623102 van 0
830 1.663566 bus 2
831 1.260036 bus 2
832 0.452977 car 1
833 -0.623102 car 2
834 -1.833690 car 0
835 -1.699181 bus 1
836 0.049447 van 0
837 0.452977 car 2
838 0.587487 van 2
839 0.991016 van 2
840 1.529056 car 2
841 -0.085062 car 2
842 0.183957 van 2
843 0.721997 car 1
844 -0.085062 car 2
845 -0.757612 van 0

846 rows × 3 columns

In [0]:
vhcl_df.boxplot(by = 'GROUP' , layout = (3,6) , figsize = (25,15))
Out[0]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x7f952e40bfd0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f952e379828>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f952e339978>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f952e2aec50>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f952e2d7f28>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f952e288240>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x7f952e230518>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f952e25a828>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f952e25a860>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f952e1aada0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f952e15d0b8>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f952e185390>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x7f952e12d668>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f952e156940>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f952e100c18>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f952e0a8ef0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f952e0da208>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f952e0824e0>]],
      dtype=object)
In [0]:
# Usinfg the Value of k =4 Still makes our data not being nicely visualized in box plot rather we see in some cases there the value falling in the same
# range for all the 4 classes SO will go with k = 3value. 
In [0]:
#Moving to PCA 

Moving to PCA

In [0]:
# IN PCA we will use Eigen values and Eigen vectors and based on the data covered by these we will skip the columns which are of less magnitude when it comes to
## have data captured 
## ALthough we will be dropping the principal component in this case rather than the columns themselves

## and then testing the accuracy for the same
vhclz_df.head()
Out[0]:
compactness circularity distance_circularity radius_ratio pr.axis_aspect_ratio max.length_aspect_ratio scatter_ratio elongatedness pr.axis_rectangularity max.length_rectangularity scaled_variance scaled_variance.1 scaled_radius_of_gyration scaled_radius_of_gyration.1 skewness_about skewness_about.1 skewness_about.2 hollows_ratio
0 0.160580 0.518073 0.057177 0.273363 1.310398 0.311542 -0.207598 0.136262 -0.224342 0.758332 -0.401920 -0.341934 0.285705 -0.327326 -0.073812 0.380870 -0.312012 0.183957
1 -0.325470 -0.623732 0.120741 -0.835032 -0.593753 0.094079 -0.599423 0.520519 -0.610886 -0.344578 -0.593357 -0.619724 -0.513630 -0.059384 0.538390 0.156798 0.013265 0.452977
2 1.254193 0.844303 1.519141 1.202018 0.548738 0.311542 1.148719 -1.144597 0.935290 0.689401 1.097671 1.109379 1.392477 0.074587 1.558727 -0.403383 -0.149374 0.049447
3 -0.082445 -0.623732 -0.006386 -0.295813 0.167907 0.094079 -0.750125 0.648605 -0.610886 -0.344578 -0.912419 -0.738777 -1.466683 -1.265121 -0.073812 -0.291347 1.639649 1.529056
4 -1.054545 -0.134387 -0.769150 1.082192 5.245643 9.444962 -0.599423 0.520519 -0.610886 -0.275646 1.671982 -0.648070 0.408680 7.309005 0.538390 -0.179311 -1.450481 -1.699181
In [0]:
X_z = vhclz_df
In [0]:
cov_matrix = np.cov(X_z.T)
print('Covariance matrix \n%s',cov_matrix)
Covariance matrix 
%s [[ 1.00118343  0.68569786  0.79086299  0.69055952  0.09164265  0.14842463
   0.81358214 -0.78968322  0.81465658  0.67694334  0.76297234  0.81497566
   0.58593517 -0.24988794  0.23635777  0.15720044  0.29889034  0.36598446]
 [ 0.68569786  1.00118343  0.79325751  0.6216467   0.15396023  0.25176438
   0.8489411  -0.82244387  0.84439802  0.96245572  0.79724837  0.83693508
   0.92691166  0.05200785  0.14436828 -0.01145212 -0.10455005  0.04640562]
 [ 0.79086299  0.79325751  1.00118343  0.76794246  0.15864319  0.26499957
   0.90614687 -0.9123854   0.89408198  0.77544391  0.86253904  0.88706577
   0.70660663 -0.22621115  0.1140589   0.26586088  0.14627113  0.33312625]
 [ 0.69055952  0.6216467   0.76794246  1.00118343  0.66423242  0.45058426
   0.73529816 -0.79041561  0.70922371  0.56962256  0.79435372  0.71928618
   0.53700678 -0.18061084  0.04877032  0.17394649  0.38266622  0.47186659]
 [ 0.09164265  0.15396023  0.15864319  0.66423242  1.00118343  0.64949139
   0.10385472 -0.18325156  0.07969786  0.1270594   0.27323306  0.08929427
   0.12211524  0.15313091 -0.05843967 -0.0320139   0.24016968  0.26804208]
 [ 0.14842463  0.25176438  0.26499957  0.45058426  0.64949139  1.00118343
   0.16638787 -0.18035326  0.16169312  0.30630475  0.31933428  0.1434227
   0.18996732  0.29608463  0.01561769  0.04347324 -0.02611148  0.14408905]
 [ 0.81358214  0.8489411   0.90614687  0.73529816  0.10385472  0.16638787
   1.00118343 -0.97275069  0.99092181  0.81004084  0.94978498  0.9941867
   0.80082111 -0.02757446  0.07454578  0.21267959  0.00563439  0.1189581 ]
 [-0.78968322 -0.82244387 -0.9123854  -0.79041561 -0.18325156 -0.18035326
  -0.97275069  1.00118343 -0.95011894 -0.77677186 -0.93748998 -0.95494487
  -0.76722075  0.10342428 -0.05266193 -0.18527244 -0.11526213 -0.2171615 ]
 [ 0.81465658  0.84439802  0.89408198  0.70922371  0.07969786  0.16169312
   0.99092181 -0.95011894  1.00118343  0.81189327  0.93533261  0.98938264
   0.79763248 -0.01551372  0.08386628  0.21495454 -0.01867064  0.09940372]
 [ 0.67694334  0.96245572  0.77544391  0.56962256  0.1270594   0.30630475
   0.81004084 -0.77677186  0.81189327  1.00118343  0.74586628  0.79555492
   0.86747579  0.04167099  0.13601231  0.00136727 -0.10407076  0.07686047]
 [ 0.76297234  0.79724837  0.86253904  0.79435372  0.27323306  0.31933428
   0.94978498 -0.93748998  0.93533261  0.74586628  1.00118343  0.94679667
   0.77983844  0.11321163  0.03677248  0.19446837  0.01423606  0.08579656]
 [ 0.81497566  0.83693508  0.88706577  0.71928618  0.08929427  0.1434227
   0.9941867  -0.95494487  0.98938264  0.79555492  0.94679667  1.00118343
   0.79595778 -0.01541878  0.07696823  0.20104818  0.00622636  0.10305714]
 [ 0.58593517  0.92691166  0.70660663  0.53700678  0.12211524  0.18996732
   0.80082111 -0.76722075  0.79763248  0.86747579  0.77983844  0.79595778
   1.00118343  0.19169941  0.16667971 -0.05621953 -0.22471583 -0.11814142]
 [-0.24988794  0.05200785 -0.22621115 -0.18061084  0.15313091  0.29608463
  -0.02757446  0.10342428 -0.01551372  0.04167099  0.11321163 -0.01541878
   0.19169941  1.00118343 -0.08846001 -0.12633227 -0.749751   -0.80307227]
 [ 0.23635777  0.14436828  0.1140589   0.04877032 -0.05843967  0.01561769
   0.07454578 -0.05266193  0.08386628  0.13601231  0.03677248  0.07696823
   0.16667971 -0.08846001  1.00118343 -0.03503155  0.1154338   0.09724079]
 [ 0.15720044 -0.01145212  0.26586088  0.17394649 -0.0320139   0.04347324
   0.21267959 -0.18527244  0.21495454  0.00136727  0.19446837  0.20104818
  -0.05621953 -0.12633227 -0.03503155  1.00118343  0.07740174  0.20523257]
 [ 0.29889034 -0.10455005  0.14627113  0.38266622  0.24016968 -0.02611148
   0.00563439 -0.11526213 -0.01867064 -0.10407076  0.01423606  0.00622636
  -0.22471583 -0.749751    0.1154338   0.07740174  1.00118343  0.89363767]
 [ 0.36598446  0.04640562  0.33312625  0.47186659  0.26804208  0.14408905
   0.1189581  -0.2171615   0.09940372  0.07686047  0.08579656  0.10305714
  -0.11814142 -0.80307227  0.09724079  0.20523257  0.89363767  1.00118343]]
In [0]:
eigenvalues,eigenvectors = np.linalg.eig(cov_matrix)

print("\nEigen vectors \n%s ",eigenvectors)
print("\nEigen Values \n%s",eigenvalues)
Eigen vectors 
%s  [[ 2.75283688e-01  1.26953763e-01  1.19922479e-01 -7.83843562e-02
  -6.95178336e-02  1.44875476e-01  4.51862331e-01  5.66136785e-01
   4.84418105e-01  2.60076393e-01 -4.65342885e-02  1.20344026e-02
  -1.56136836e-01 -1.00728764e-02 -6.00532537e-03  6.00485194e-02
  -6.50956666e-02 -9.67780251e-03]
 [ 2.93258469e-01 -1.25576727e-01  2.48205467e-02 -1.87337408e-01
   8.50649539e-02 -3.02731148e-01 -2.49103387e-01  1.79851809e-01
   1.41569001e-02 -9.80779086e-02 -3.01323693e-03 -2.13635088e-01
  -1.50116709e-02 -9.15939674e-03  7.38059396e-02 -4.26993118e-01
  -2.61244802e-01 -5.97862837e-01]
 [ 3.04609128e-01  7.29516436e-02  5.60143254e-02  7.12008427e-02
  -4.06645651e-02 -1.38405773e-01  7.40350569e-02 -4.34748988e-01
   1.67572478e-01  2.05031597e-01 -7.06489498e-01  3.46330345e-04
   2.37111452e-01  6.94599696e-03 -2.50791236e-02  1.46240270e-01
   7.82651714e-02 -1.57257142e-01]
 [ 2.67606877e-01  1.89634378e-01 -2.75074211e-01  4.26053415e-02
   4.61473714e-02  2.48136636e-01 -1.76912814e-01 -1.01998360e-01
   2.30313563e-01  4.77888949e-02  1.07151583e-01 -1.57049977e-01
   3.07818692e-02 -4.20156482e-02 -3.59880417e-02 -5.21374718e-01
   5.60792139e-01  1.66551725e-01]
 [ 8.05039890e-02  1.22174860e-01 -6.42012966e-01 -3.27257119e-02
   4.05494487e-02  2.36932611e-01 -3.97876601e-01  6.87147927e-02
   2.77128307e-01 -1.08075009e-01 -3.85169721e-02  1.10106595e-01
   3.92804479e-02  3.12698087e-02  1.25847434e-02  3.63120360e-01
  -3.22276873e-01 -6.36138719e-02]
 [ 9.72756855e-02 -1.07482875e-02 -5.91801304e-01 -3.14147277e-02
  -2.13432566e-01 -4.19330747e-01  5.03413610e-01 -1.61153097e-01
  -1.48032250e-01  1.18266345e-01  2.62254132e-01 -1.32935328e-01
  -3.72884301e-02 -9.99915816e-03 -2.84168792e-02  6.27796802e-02
   4.87809642e-02 -8.63169844e-02]
 [ 3.17092750e-01 -4.81181371e-02  9.76283108e-02  9.57485748e-02
   1.54853055e-02  1.16100153e-01  6.49879382e-02 -1.00688056e-01
  -5.44574214e-02 -1.65167200e-01  1.70405800e-01  9.55883216e-02
  -3.94638419e-02  8.40975659e-01 -2.49652703e-01  6.40502241e-02
   1.81839668e-02 -7.98693109e-02]
 [-3.14133155e-01 -1.27498515e-02 -5.76484384e-02 -8.22901952e-02
  -7.68518712e-02 -1.41840112e-01  1.38112945e-02  2.15497166e-01
   1.56867362e-01  1.51612333e-01  5.76632611e-02  1.22012715e-01
   8.10394855e-01  2.38188639e-01 -4.21478467e-02 -1.86946145e-01
  -2.50330194e-02  4.21515054e-02]
 [ 3.13959064e-01 -5.99352482e-02  1.09512416e-01  9.24582989e-02
  -2.17633157e-03  9.80561329e-02  9.66573058e-02 -6.35933915e-02
  -5.24978759e-03 -1.93777917e-01  2.72514033e-01  2.51281206e-01
   2.71573184e-01 -1.01154594e-01  7.17396292e-01  1.80912790e-01
   1.64490784e-01 -1.44490635e-01]
 [ 2.82830900e-01 -1.16220532e-01  1.70641987e-02 -1.88005612e-01
   6.06366845e-02 -4.61674972e-01 -1.04552173e-01  2.49495867e-01
   6.10362445e-02 -4.69059999e-01 -1.41434233e-01 -1.24529334e-01
   7.57105808e-02 -1.69481636e-02 -4.70233017e-02  1.74070296e-01
   1.47280090e-01  5.11259153e-01]
 [ 3.09280359e-01 -6.22806229e-02 -5.63239801e-02  1.19844008e-01
   4.56472367e-04  2.36225434e-01  1.14622578e-01 -5.02096319e-02
  -2.97588112e-01  1.29986011e-01 -7.72596638e-02 -2.15011644e-01
   1.53180808e-01  6.04665108e-03  1.71503771e-01 -2.77272123e-01
  -5.64444637e-01  4.53236855e-01]
 [ 3.13788457e-01 -5.37843596e-02  1.08840729e-01  9.17449325e-02
   1.95548315e-02  1.57820194e-01  8.37350220e-02 -4.37649907e-02
  -8.33669838e-02 -1.58203940e-01  2.43226301e-01  1.75685051e-01
   3.07948154e-01 -4.69202757e-01 -6.16589383e-01  7.85141734e-02
  -6.85856929e-02 -1.26992250e-01]
 [ 2.72047492e-01 -2.09233172e-01  3.14636493e-02 -2.00095228e-01
   6.15991681e-02 -1.35576278e-01 -3.73944382e-01  1.08474496e-01
  -2.41655483e-01  6.86493700e-01  1.58888394e-01  1.90336498e-01
  -3.76087492e-02  1.17483082e-02 -2.64910290e-02  2.00683948e-01
   1.47099233e-01  1.09982525e-01]
 [-2.08137692e-02 -4.88525148e-01 -2.86277015e-01  6.55051354e-02
  -1.45530146e-01  2.41356821e-01  1.11952983e-01  3.40878491e-01
  -3.20221887e-01 -1.27648385e-01 -4.19188664e-01  2.85710601e-01
  -4.34650674e-02  3.14812146e-03 -1.42959461e-02 -1.46861607e-01
   2.32941262e-01 -1.11271959e-01]
 [ 4.14555082e-02  5.50899716e-02  1.15679354e-01 -6.04794251e-01
  -7.29189842e-01  2.03209257e-01 -8.06328902e-02 -1.56487670e-01
  -2.21054148e-02 -9.83643219e-02  1.25447648e-02 -1.60327156e-03
  -9.94304634e-03 -3.03156233e-03  1.74310271e-03 -1.73360301e-02
  -2.77589170e-02  2.40943096e-02]
 [ 5.82250207e-02  1.24085090e-01  7.52828901e-02  6.66114117e-01
  -5.99196401e-01 -1.91960802e-01 -2.84558723e-01  2.08774083e-01
  -1.01761758e-02  3.55150608e-02  3.27808069e-02 -8.32589542e-02
  -2.68915150e-02 -1.25315953e-02 -7.08894692e-03  3.13689218e-02
   2.78187408e-03 -9.89651885e-03]
 [ 3.02795063e-02  5.40914775e-01 -8.73592034e-03 -1.05526253e-01
   1.00602332e-01  1.56939174e-01  1.81451818e-02  3.04580219e-01
  -5.17222779e-01 -1.93956186e-02 -1.20597635e-01 -3.53723696e-01
   1.86595152e-01  4.34282436e-02  7.67874680e-03  2.31451048e-01
   1.90629960e-01 -1.82212045e-01]
 [ 7.41453913e-02  5.40354258e-01 -3.95242743e-02 -4.74890311e-02
   2.98614819e-02 -2.41222817e-01  1.57237839e-02  3.04186304e-02
  -1.71506343e-01 -6.41314778e-02 -9.19597847e-02  6.85618161e-01
  -1.42380007e-01 -6.47700819e-03  6.37681817e-03 -2.88502234e-01
  -1.20966490e-01  9.04014702e-02]]

Eigen Values 
%s [9.40460261e+00 3.01492206e+00 1.90352502e+00 1.17993747e+00
 9.17260633e-01 5.39992629e-01 3.58870118e-01 2.21932456e-01
 1.60608597e-01 9.18572234e-02 6.64994118e-02 4.66005994e-02
 3.57947189e-02 2.96445743e-03 1.00257898e-02 2.74120657e-02
 1.79166314e-02 2.05792871e-02]
In [0]:
eig_pairs  = [(eigenvalues[index],eigenvectors[:,index]) for index in range (len(eigenvalues))]
eig_pairs.sort()
eig_pairs.reverse()
print(eig_pairs)

## HEre we created a list called eig_pairs where we have various values within for eigen values and eigen vectors following that for that particular value

## and that is now sorted and reversed the eigen value and the eigen vectors tuploe with in our list.
[(9.404602609088712, array([ 0.27528369,  0.29325847,  0.30460913,  0.26760688,  0.08050399,
        0.09727569,  0.31709275, -0.31413315,  0.31395906,  0.2828309 ,
        0.30928036,  0.31378846,  0.27204749, -0.02081377,  0.04145551,
        0.05822502,  0.03027951,  0.07414539])), (3.0149220585246312, array([ 0.12695376, -0.12557673,  0.07295164,  0.18963438,  0.12217486,
       -0.01074829, -0.04811814, -0.01274985, -0.05993525, -0.11622053,
       -0.06228062, -0.05378436, -0.20923317, -0.48852515,  0.05508997,
        0.12408509,  0.54091477,  0.54035426])), (1.9035250218389657, array([ 0.11992248,  0.02482055,  0.05601433, -0.27507421, -0.64201297,
       -0.5918013 ,  0.09762831, -0.05764844,  0.10951242,  0.0170642 ,
       -0.05632398,  0.10884073,  0.03146365, -0.28627701,  0.11567935,
        0.07528289, -0.00873592, -0.03952427])), (1.1799374684450206, array([-0.07838436, -0.18733741,  0.07120084,  0.04260534, -0.03272571,
       -0.03141473,  0.09574857, -0.0822902 ,  0.0924583 , -0.18800561,
        0.11984401,  0.09174493, -0.20009523,  0.06550514, -0.60479425,
        0.66611412, -0.10552625, -0.04748903])), (0.9172606328594378, array([-6.95178336e-02,  8.50649539e-02, -4.06645651e-02,  4.61473714e-02,
        4.05494487e-02, -2.13432566e-01,  1.54853055e-02, -7.68518712e-02,
       -2.17633157e-03,  6.06366845e-02,  4.56472367e-04,  1.95548315e-02,
        6.15991681e-02, -1.45530146e-01, -7.29189842e-01, -5.99196401e-01,
        1.00602332e-01,  2.98614819e-02])), (0.5399926288001129, array([ 0.14487548, -0.30273115, -0.13840577,  0.24813664,  0.23693261,
       -0.41933075,  0.11610015, -0.14184011,  0.09805613, -0.46167497,
        0.23622543,  0.15782019, -0.13557628,  0.24135682,  0.20320926,
       -0.1919608 ,  0.15693917, -0.24122282])), (0.35887011792939744, array([ 0.45186233, -0.24910339,  0.07403506, -0.17691281, -0.3978766 ,
        0.50341361,  0.06498794,  0.01381129,  0.09665731, -0.10455217,
        0.11462258,  0.08373502, -0.37394438,  0.11195298, -0.08063289,
       -0.28455872,  0.01814518,  0.01572378])), (0.22193245599893402, array([ 0.56613679,  0.17985181, -0.43474899, -0.10199836,  0.06871479,
       -0.1611531 , -0.10068806,  0.21549717, -0.06359339,  0.24949587,
       -0.05020963, -0.04376499,  0.1084745 ,  0.34087849, -0.15648767,
        0.20877408,  0.30458022,  0.03041863])), (0.16060859663511767, array([ 0.4844181 ,  0.0141569 ,  0.16757248,  0.23031356,  0.27712831,
       -0.14803225, -0.05445742,  0.15686736, -0.00524979,  0.06103624,
       -0.29758811, -0.08336698, -0.24165548, -0.32022189, -0.02210541,
       -0.01017618, -0.51722278, -0.17150634])), (0.09185722339516159, array([ 0.26007639, -0.09807791,  0.2050316 ,  0.04778889, -0.10807501,
        0.11826635, -0.1651672 ,  0.15161233, -0.19377792, -0.46906   ,
        0.12998601, -0.15820394,  0.6864937 , -0.12764838, -0.09836432,
        0.03551506, -0.01939562, -0.06413148])), (0.06649941176460192, array([-0.04653429, -0.00301324, -0.7064895 ,  0.10715158, -0.03851697,
        0.26225413,  0.1704058 ,  0.05766326,  0.27251403, -0.14143423,
       -0.07725966,  0.2432263 ,  0.15888839, -0.41918866,  0.01254476,
        0.03278081, -0.12059763, -0.09195978])), (0.04660059944187703, array([ 1.20344026e-02, -2.13635088e-01,  3.46330345e-04, -1.57049977e-01,
        1.10106595e-01, -1.32935328e-01,  9.55883216e-02,  1.22012715e-01,
        2.51281206e-01, -1.24529334e-01, -2.15011644e-01,  1.75685051e-01,
        1.90336498e-01,  2.85710601e-01, -1.60327156e-03, -8.32589542e-02,
       -3.53723696e-01,  6.85618161e-01])), (0.03579471891303863, array([-0.15613684, -0.01501167,  0.23711145,  0.03078187,  0.03928045,
       -0.03728843, -0.03946384,  0.81039486,  0.27157318,  0.07571058,
        0.15318081,  0.30794815, -0.03760875, -0.04346507, -0.00994305,
       -0.02689151,  0.18659515, -0.14238001])), (0.02741206573719489, array([ 0.06004852, -0.42699312,  0.14624027, -0.52137472,  0.36312036,
        0.06277968,  0.06405022, -0.18694615,  0.18091279,  0.1740703 ,
       -0.27727212,  0.07851417,  0.20068395, -0.14686161, -0.01733603,
        0.03136892,  0.23145105, -0.28850223])), (0.020579287070888228, array([-0.0096778 , -0.59786284, -0.15725714,  0.16655173, -0.06361387,
       -0.08631698, -0.07986931,  0.04215151, -0.14449063,  0.51125915,
        0.45323685, -0.12699225,  0.10998252, -0.11127196,  0.02409431,
       -0.00989652, -0.18221204,  0.09040147])), (0.01791663143223666, array([-0.06509567, -0.2612448 ,  0.07826517,  0.56079214, -0.32227687,
        0.04878096,  0.01818397, -0.02503302,  0.16449078,  0.14728009,
       -0.56444464, -0.06858569,  0.14709923,  0.23294126, -0.02775892,
        0.00278187,  0.19062996, -0.12096649])), (0.010025789847556175, array([-0.00600533,  0.07380594, -0.02507912, -0.03598804,  0.01258474,
       -0.02841688, -0.2496527 , -0.04214785,  0.71739629, -0.0470233 ,
        0.17150377, -0.61658938, -0.02649103, -0.01429595,  0.0017431 ,
       -0.00708895,  0.00767875,  0.00637682])), (0.0029644574250446325, array([-0.01007288, -0.0091594 ,  0.006946  , -0.04201565,  0.03126981,
       -0.00999916,  0.84097566,  0.23818864, -0.10115459, -0.01694816,
        0.00604665, -0.46920276,  0.01174831,  0.00314812, -0.00303156,
       -0.0125316 ,  0.04342824, -0.00647701]))]
In [0]:
## Extract the descending order eien values and the eigen vectors 

eigenvalues_sorted = [eig_pairs[index][0] for index in range(len(eigenvalues))]
eigenvectors_sorted = [eig_pairs[index][1] for index in range(len(eigenvalues))]



print('\n Eigenvalues in descending order \n %s' %eigenvalues_sorted)
 Eigenvalues in descending order 
 [9.404602609088712, 3.0149220585246312, 1.9035250218389657, 1.1799374684450206, 0.9172606328594378, 0.5399926288001129, 0.35887011792939744, 0.22193245599893402, 0.16060859663511767, 0.09185722339516159, 0.06649941176460192, 0.04660059944187703, 0.03579471891303863, 0.02741206573719489, 0.020579287070888228, 0.01791663143223666, 0.010025789847556175, 0.0029644574250446325]
In [0]:
tot = sum(eigenvalues)
var_explained = [(i/tot) for i in sorted(eigenvalues, reverse=True)]
cum_var_exp = np.cumsum(var_explained)
In [0]:
plt.bar(range(1,19),var_explained,alpha=0.5, align= 'center',label='individual explained variance')
plt.step(range(1,19),cum_var_exp,where= 'mid',label = 'cumulative explained variance')
plt.ylabel('Explained Variance ratio')
plt.xlabel('Principal components')
plt.legend(loc='best')
plt.show()
In [0]:
## Here we see that with in the 8 - 10 Principal components we are reaching the mark of BOVE 90 % . 

##cl_reduce = np.array(eigenvectors_sorted[0:10]) ## HEre we are reducing from 18 to 10 dimension 
In [0]:
from sklearn.preprocessing import StandardScaler
In [0]:
sc = StandardScaler()
In [0]:
 
In [0]:
vhcls=pd.read_csv('vehicle_pca.csv')
In [0]:
vhcls_df = vhcls.drop('class',axis = 1)
vhcls_target = vhcls.pop('class')
In [0]:
vhcls_df.head()
Out[0]:
compactness circularity distance_circularity radius_ratio pr.axis_aspect_ratio max.length_aspect_ratio scatter_ratio elongatedness pr.axis_rectangularity max.length_rectangularity scaled_variance scaled_variance.1 scaled_radius_of_gyration scaled_radius_of_gyration.1 skewness_about skewness_about.1 skewness_about.2 hollows_ratio
0 95 48.0 83.0 178.0 72.0 10 162.0 42.0 20.0 159 176.0 379.0 184.0 70.0 6.0 16.0 187.0 197
1 91 41.0 84.0 141.0 57.0 9 149.0 45.0 19.0 143 170.0 330.0 158.0 72.0 9.0 14.0 189.0 199
2 104 50.0 106.0 209.0 66.0 10 207.0 32.0 23.0 158 223.0 635.0 220.0 73.0 14.0 9.0 188.0 196
3 93 41.0 82.0 159.0 63.0 9 144.0 46.0 19.0 143 160.0 309.0 127.0 63.0 6.0 10.0 199.0 207
4 85 44.0 70.0 205.0 103.0 52 149.0 45.0 19.0 144 241.0 325.0 188.0 127.0 9.0 11.0 180.0 183
In [0]:
vhcl_std = sc.fit_transform(vhcls_df)
In [0]:
 
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-53-9b0e498dfde7> in <module>()
----> 1 vhcl_std.head()

AttributeError: 'numpy.ndarray' object has no attribute 'head'
In [54]:
vhcl_std
Out[54]:
array([[ 0.16058035,  0.515771  ,  0.05641152, ...,  0.38044029,
        -0.31200827,  0.18395733],
       [-0.32546965, -0.62271564,  0.1198274 , ...,  0.15649599,
         0.01308049,  0.45297703],
       [ 1.25419283,  0.84105289,  1.5149769 , ..., -0.40336477,
        -0.14946389,  0.04944748],
       ...,
       [ 1.49721783,  1.49161668,  1.19789747, ..., -0.96322552,
        -0.31200827,  0.72199673],
       [-0.93303214, -1.43592038, -0.26066791, ...,  1.38818965,
         0.17562488, -0.08506238],
       [-1.05454464, -1.43592038, -1.02165854, ...,  0.60438459,
        -0.47455265, -0.75761164]])
In [0]:
vhcl_reduce = np.array(eigenvectors_sorted[0:10])

vhcl_std_10d = np.dot(vhcl_std , vhcl_reduce.T)

project_vhcl= pd.DataFrame(vhcl_std_10d)
In [56]:
project_vhcl = project_vhcl.fillna(project_vhcl.median())

project_vhcl.isnull().sum()
Out[56]:
0    0
1    0
2    0
3    0
4    0
5    0
6    0
7    0
8    0
9    0
dtype: int64
In [57]:
sns.pairplot(project_vhcl,diag_kind = 'kde')
Out[57]:
<seaborn.axisgrid.PairGrid at 0x7f9531a528d0>
In [0]:
#Final Moving towards model building :-- 

from sklearn import model_selection
test_size = 30
X_train, X_test, y_train, y_test = model_selection.train_test_split(project_vhcl, vhcl_target, test_size = 0.3, random_state = 45)
  
In [0]:
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.svm import SVC

model = SVC()
In [60]:
model.fit(X_train, y_train)
/usr/local/lib/python3.6/dist-packages/sklearn/svm/base.py:193: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.
  "avoid this warning.", FutureWarning)
Out[60]:
SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
    kernel='rbf', max_iter=-1, probability=False, random_state=None,
    shrinking=True, tol=0.001, verbose=False)
In [0]:
vhcl_prediction = model.predict(X_test)
In [62]:
print(classification_report(y_test,vhcl_prediction))
print(confusion_matrix(y_test, vhcl_prediction))

## WE see that it gives a good accuracy score
              precision    recall  f1-score   support

         bus       0.99      0.90      0.94        77
         car       0.96      0.90      0.93       122
         van       0.79      1.00      0.88        55

    accuracy                           0.92       254
   macro avg       0.91      0.93      0.92       254
weighted avg       0.93      0.92      0.92       254

[[ 69   4   4]
 [  1 110  11]
 [  0   0  55]]
In [0]:
param_grid = {'C':[1,10,100,1000],'gamma':[1,0.1,0.001,0.0001], 'kernel':['linear','rbf']}
In [0]:
from sklearn.model_selection import GridSearchCV
grid = GridSearchCV(SVC(),param_grid,refit = True, verbose=2,cv=5)
In [0]:
from sklearn.model_selection import GridSearchCV
In [67]:
grid.fit(X_train,y_train)
Fitting 5 folds for each of 32 candidates, totalling 160 fits
[CV] C=1, gamma=1, kernel=linear .....................................
[CV] ...................... C=1, gamma=1, kernel=linear, total=   0.0s
[CV] C=1, gamma=1, kernel=linear .....................................
[CV] ...................... C=1, gamma=1, kernel=linear, total=   0.0s
[CV] C=1, gamma=1, kernel=linear .....................................
[CV] ...................... C=1, gamma=1, kernel=linear, total=   0.0s
[CV] C=1, gamma=1, kernel=linear .....................................
[CV] ...................... C=1, gamma=1, kernel=linear, total=   0.0s
[CV] C=1, gamma=1, kernel=linear .....................................
[CV] ...................... C=1, gamma=1, kernel=linear, total=   0.0s
[CV] C=1, gamma=1, kernel=rbf ........................................
[CV] ......................... C=1, gamma=1, kernel=rbf, total=   0.0s
[CV] C=1, gamma=1, kernel=rbf ........................................
[CV] ......................... C=1, gamma=1, kernel=rbf, total=   0.0s
[CV] C=1, gamma=1, kernel=rbf ........................................
[CV] ......................... C=1, gamma=1, kernel=rbf, total=   0.0s
[CV] C=1, gamma=1, kernel=rbf ........................................
[CV] ......................... C=1, gamma=1, kernel=rbf, total=   0.0s
[CV] C=1, gamma=1, kernel=rbf ........................................
[CV] ......................... C=1, gamma=1, kernel=rbf, total=   0.0s
[CV] C=1, gamma=0.1, kernel=linear ...................................
[CV] .................... C=1, gamma=0.1, kernel=linear, total=   0.0s
[CV] C=1, gamma=0.1, kernel=linear ...................................
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[CV] .................... C=1, gamma=0.1, kernel=linear, total=   0.0s
[CV] C=1, gamma=0.1, kernel=linear ...................................
[CV] .................... C=1, gamma=0.1, kernel=linear, total=   0.0s
[CV] C=1, gamma=0.1, kernel=linear ...................................
[CV] .................... C=1, gamma=0.1, kernel=linear, total=   0.0s
[CV] C=1, gamma=0.1, kernel=linear ...................................
[CV] .................... C=1, gamma=0.1, kernel=linear, total=   0.0s
[CV] C=1, gamma=0.1, kernel=rbf ......................................
[CV] ....................... C=1, gamma=0.1, kernel=rbf, total=   0.0s
[CV] C=1, gamma=0.1, kernel=rbf ......................................
[CV] ....................... C=1, gamma=0.1, kernel=rbf, total=   0.0s
[CV] C=1, gamma=0.1, kernel=rbf ......................................
[CV] ....................... C=1, gamma=0.1, kernel=rbf, total=   0.0s
[CV] C=1, gamma=0.1, kernel=rbf ......................................
[CV] ....................... C=1, gamma=0.1, kernel=rbf, total=   0.0s
[CV] C=1, gamma=0.1, kernel=rbf ......................................
[CV] ....................... C=1, gamma=0.1, kernel=rbf, total=   0.0s
[CV] C=1, gamma=0.001, kernel=linear .................................
[CV] .................. C=1, gamma=0.001, kernel=linear, total=   0.0s
[CV] C=1, gamma=0.001, kernel=linear .................................
[CV] .................. C=1, gamma=0.001, kernel=linear, total=   0.0s
[CV] C=1, gamma=0.001, kernel=linear .................................
[CV] .................. C=1, gamma=0.001, kernel=linear, total=   0.0s
[CV] C=1, gamma=0.001, kernel=linear .................................
[CV] .................. C=1, gamma=0.001, kernel=linear, total=   0.0s
[CV] C=1, gamma=0.001, kernel=linear .................................
[CV] .................. C=1, gamma=0.001, kernel=linear, total=   0.0s
[CV] C=1, gamma=0.001, kernel=rbf ....................................
[CV] ..................... C=1, gamma=0.001, kernel=rbf, total=   0.0s
[CV] C=1, gamma=0.001, kernel=rbf ....................................
[CV] ..................... C=1, gamma=0.001, kernel=rbf, total=   0.0s
[CV] C=1, gamma=0.001, kernel=rbf ....................................
[CV] ..................... C=1, gamma=0.001, kernel=rbf, total=   0.0s
[CV] C=1, gamma=0.001, kernel=rbf ....................................
[CV] ..................... C=1, gamma=0.001, kernel=rbf, total=   0.0s
[CV] C=1, gamma=0.001, kernel=rbf ....................................
[CV] ..................... C=1, gamma=0.001, kernel=rbf, total=   0.0s
[CV] C=1, gamma=0.0001, kernel=linear ................................
[CV] ................. C=1, gamma=0.0001, kernel=linear, total=   0.0s
[CV] C=1, gamma=0.0001, kernel=linear ................................
[CV] ................. C=1, gamma=0.0001, kernel=linear, total=   0.0s
[CV] C=1, gamma=0.0001, kernel=linear ................................
[CV] ................. C=1, gamma=0.0001, kernel=linear, total=   0.0s
[CV] C=1, gamma=0.0001, kernel=linear ................................
[CV] ................. C=1, gamma=0.0001, kernel=linear, total=   0.0s
[CV] C=1, gamma=0.0001, kernel=linear ................................
[CV] ................. C=1, gamma=0.0001, kernel=linear, total=   0.0s
[CV] C=1, gamma=0.0001, kernel=rbf ...................................
[CV] .................... C=1, gamma=0.0001, kernel=rbf, total=   0.0s
[CV] C=1, gamma=0.0001, kernel=rbf ...................................
[CV] .................... C=1, gamma=0.0001, kernel=rbf, total=   0.0s
[CV] C=1, gamma=0.0001, kernel=rbf ...................................
[CV] .................... C=1, gamma=0.0001, kernel=rbf, total=   0.0s
[CV] C=1, gamma=0.0001, kernel=rbf ...................................
[CV] .................... C=1, gamma=0.0001, kernel=rbf, total=   0.0s
[CV] C=1, gamma=0.0001, kernel=rbf ...................................
[CV] .................... C=1, gamma=0.0001, kernel=rbf, total=   0.0s
[CV] C=10, gamma=1, kernel=linear ....................................
[CV] ..................... C=10, gamma=1, kernel=linear, total=   0.0s
[CV] C=10, gamma=1, kernel=linear ....................................
[CV] ..................... C=10, gamma=1, kernel=linear, total=   0.0s
[CV] C=10, gamma=1, kernel=linear ....................................
[CV] ..................... C=10, gamma=1, kernel=linear, total=   0.0s
[CV] C=10, gamma=1, kernel=linear ....................................
[CV] ..................... C=10, gamma=1, kernel=linear, total=   0.0s
[CV] C=10, gamma=1, kernel=linear ....................................
[CV] ..................... C=10, gamma=1, kernel=linear, total=   0.0s
[CV] C=10, gamma=1, kernel=rbf .......................................
[CV] ........................ C=10, gamma=1, kernel=rbf, total=   0.0s
[CV] C=10, gamma=1, kernel=rbf .......................................
[CV] ........................ C=10, gamma=1, kernel=rbf, total=   0.0s
[CV] C=10, gamma=1, kernel=rbf .......................................
[CV] ........................ C=10, gamma=1, kernel=rbf, total=   0.0s
[CV] C=10, gamma=1, kernel=rbf .......................................
[CV] ........................ C=10, gamma=1, kernel=rbf, total=   0.0s
[CV] C=10, gamma=1, kernel=rbf .......................................
[CV] ........................ C=10, gamma=1, kernel=rbf, total=   0.0s
[CV] C=10, gamma=0.1, kernel=linear ..................................
[CV] ................... C=10, gamma=0.1, kernel=linear, total=   0.0s
[CV] C=10, gamma=0.1, kernel=linear ..................................
[CV] ................... C=10, gamma=0.1, kernel=linear, total=   0.0s
[CV] C=10, gamma=0.1, kernel=linear ..................................
[CV] ................... C=10, gamma=0.1, kernel=linear, total=   0.0s
[CV] C=10, gamma=0.1, kernel=linear ..................................
[CV] ................... C=10, gamma=0.1, kernel=linear, total=   0.0s
[CV] C=10, gamma=0.1, kernel=linear ..................................
[CV] ................... C=10, gamma=0.1, kernel=linear, total=   0.0s
[CV] C=10, gamma=0.1, kernel=rbf .....................................
[CV] ...................... C=10, gamma=0.1, kernel=rbf, total=   0.0s
[CV] C=10, gamma=0.1, kernel=rbf .....................................
[CV] ...................... C=10, gamma=0.1, kernel=rbf, total=   0.0s
[CV] C=10, gamma=0.1, kernel=rbf .....................................
[CV] ...................... C=10, gamma=0.1, kernel=rbf, total=   0.0s
[CV] C=10, gamma=0.1, kernel=rbf .....................................
[CV] ...................... C=10, gamma=0.1, kernel=rbf, total=   0.0s
[CV] C=10, gamma=0.1, kernel=rbf .....................................
[CV] ...................... C=10, gamma=0.1, kernel=rbf, total=   0.0s
[CV] C=10, gamma=0.001, kernel=linear ................................
[CV] ................. C=10, gamma=0.001, kernel=linear, total=   0.0s
[CV] C=10, gamma=0.001, kernel=linear ................................
[CV] ................. C=10, gamma=0.001, kernel=linear, total=   0.0s
[CV] C=10, gamma=0.001, kernel=linear ................................
[CV] ................. C=10, gamma=0.001, kernel=linear, total=   0.0s
[CV] C=10, gamma=0.001, kernel=linear ................................
[CV] ................. C=10, gamma=0.001, kernel=linear, total=   0.0s
[CV] C=10, gamma=0.001, kernel=linear ................................
[CV] ................. C=10, gamma=0.001, kernel=linear, total=   0.0s
[CV] C=10, gamma=0.001, kernel=rbf ...................................
[CV] .................... C=10, gamma=0.001, kernel=rbf, total=   0.0s
[CV] C=10, gamma=0.001, kernel=rbf ...................................
[CV] .................... C=10, gamma=0.001, kernel=rbf, total=   0.0s
[CV] C=10, gamma=0.001, kernel=rbf ...................................
[CV] .................... C=10, gamma=0.001, kernel=rbf, total=   0.0s
[CV] C=10, gamma=0.001, kernel=rbf ...................................
[CV] .................... C=10, gamma=0.001, kernel=rbf, total=   0.0s
[CV] C=10, gamma=0.001, kernel=rbf ...................................
[CV] .................... C=10, gamma=0.001, kernel=rbf, total=   0.0s
[CV] C=10, gamma=0.0001, kernel=linear ...............................
[CV] ................ C=10, gamma=0.0001, kernel=linear, total=   0.0s
[CV] C=10, gamma=0.0001, kernel=linear ...............................
[CV] ................ C=10, gamma=0.0001, kernel=linear, total=   0.0s
[CV] C=10, gamma=0.0001, kernel=linear ...............................
[CV] ................ C=10, gamma=0.0001, kernel=linear, total=   0.0s
[CV] C=10, gamma=0.0001, kernel=linear ...............................
[CV] ................ C=10, gamma=0.0001, kernel=linear, total=   0.0s
[CV] C=10, gamma=0.0001, kernel=linear ...............................
[CV] ................ C=10, gamma=0.0001, kernel=linear, total=   0.0s
[CV] C=10, gamma=0.0001, kernel=rbf ..................................
[CV] ................... C=10, gamma=0.0001, kernel=rbf, total=   0.0s
[CV] C=10, gamma=0.0001, kernel=rbf ..................................
[CV] ................... C=10, gamma=0.0001, kernel=rbf, total=   0.0s
[CV] C=10, gamma=0.0001, kernel=rbf ..................................
[CV] ................... C=10, gamma=0.0001, kernel=rbf, total=   0.0s
[CV] C=10, gamma=0.0001, kernel=rbf ..................................
[CV] ................... C=10, gamma=0.0001, kernel=rbf, total=   0.0s
[CV] C=10, gamma=0.0001, kernel=rbf ..................................
[CV] ................... C=10, gamma=0.0001, kernel=rbf, total=   0.0s
[CV] C=100, gamma=1, kernel=linear ...................................
[CV] .................... C=100, gamma=1, kernel=linear, total=   0.3s
[CV] C=100, gamma=1, kernel=linear ...................................
[CV] .................... C=100, gamma=1, kernel=linear, total=   0.3s
[CV] C=100, gamma=1, kernel=linear ...................................
[CV] .................... C=100, gamma=1, kernel=linear, total=   0.2s
[CV] C=100, gamma=1, kernel=linear ...................................
[CV] .................... C=100, gamma=1, kernel=linear, total=   0.2s
[CV] C=100, gamma=1, kernel=linear ...................................
[CV] .................... C=100, gamma=1, kernel=linear, total=   0.1s
[CV] C=100, gamma=1, kernel=rbf ......................................
[CV] ....................... C=100, gamma=1, kernel=rbf, total=   0.0s
[CV] C=100, gamma=1, kernel=rbf ......................................
[CV] ....................... C=100, gamma=1, kernel=rbf, total=   0.0s
[CV] C=100, gamma=1, kernel=rbf ......................................
[CV] ....................... C=100, gamma=1, kernel=rbf, total=   0.0s
[CV] C=100, gamma=1, kernel=rbf ......................................
[CV] ....................... C=100, gamma=1, kernel=rbf, total=   0.0s
[CV] C=100, gamma=1, kernel=rbf ......................................
[CV] ....................... C=100, gamma=1, kernel=rbf, total=   0.0s
[CV] C=100, gamma=0.1, kernel=linear .................................
[CV] .................. C=100, gamma=0.1, kernel=linear, total=   0.3s
[CV] C=100, gamma=0.1, kernel=linear .................................
[CV] .................. C=100, gamma=0.1, kernel=linear, total=   0.3s
[CV] C=100, gamma=0.1, kernel=linear .................................
[CV] .................. C=100, gamma=0.1, kernel=linear, total=   0.2s
[CV] C=100, gamma=0.1, kernel=linear .................................
[CV] .................. C=100, gamma=0.1, kernel=linear, total=   0.2s
[CV] C=100, gamma=0.1, kernel=linear .................................
[CV] .................. C=100, gamma=0.1, kernel=linear, total=   0.1s
[CV] C=100, gamma=0.1, kernel=rbf ....................................
[CV] ..................... C=100, gamma=0.1, kernel=rbf, total=   0.0s
[CV] C=100, gamma=0.1, kernel=rbf ....................................
[CV] ..................... C=100, gamma=0.1, kernel=rbf, total=   0.0s
[CV] C=100, gamma=0.1, kernel=rbf ....................................
[CV] ..................... C=100, gamma=0.1, kernel=rbf, total=   0.0s
[CV] C=100, gamma=0.1, kernel=rbf ....................................
[CV] ..................... C=100, gamma=0.1, kernel=rbf, total=   0.0s
[CV] C=100, gamma=0.1, kernel=rbf ....................................
[CV] ..................... C=100, gamma=0.1, kernel=rbf, total=   0.0s
[CV] C=100, gamma=0.001, kernel=linear ...............................
[CV] ................ C=100, gamma=0.001, kernel=linear, total=   0.3s
[CV] C=100, gamma=0.001, kernel=linear ...............................
[CV] ................ C=100, gamma=0.001, kernel=linear, total=   0.2s
[CV] C=100, gamma=0.001, kernel=linear ...............................
[CV] ................ C=100, gamma=0.001, kernel=linear, total=   0.2s
[CV] C=100, gamma=0.001, kernel=linear ...............................
[CV] ................ C=100, gamma=0.001, kernel=linear, total=   0.2s
[CV] C=100, gamma=0.001, kernel=linear ...............................
[CV] ................ C=100, gamma=0.001, kernel=linear, total=   0.1s
[CV] C=100, gamma=0.001, kernel=rbf ..................................
[CV] ................... C=100, gamma=0.001, kernel=rbf, total=   0.0s
[CV] C=100, gamma=0.001, kernel=rbf ..................................
[CV] ................... C=100, gamma=0.001, kernel=rbf, total=   0.0s
[CV] C=100, gamma=0.001, kernel=rbf ..................................
[CV] ................... C=100, gamma=0.001, kernel=rbf, total=   0.0s
[CV] C=100, gamma=0.001, kernel=rbf ..................................
[CV] ................... C=100, gamma=0.001, kernel=rbf, total=   0.0s
[CV] C=100, gamma=0.001, kernel=rbf ..................................
[CV] ................... C=100, gamma=0.001, kernel=rbf, total=   0.0s
[CV] C=100, gamma=0.0001, kernel=linear ..............................
[CV] ............... C=100, gamma=0.0001, kernel=linear, total=   0.3s
[CV] C=100, gamma=0.0001, kernel=linear ..............................
[CV] ............... C=100, gamma=0.0001, kernel=linear, total=   0.2s
[CV] C=100, gamma=0.0001, kernel=linear ..............................
[CV] ............... C=100, gamma=0.0001, kernel=linear, total=   0.2s
[CV] C=100, gamma=0.0001, kernel=linear ..............................
[CV] ............... C=100, gamma=0.0001, kernel=linear, total=   0.2s
[CV] C=100, gamma=0.0001, kernel=linear ..............................
[CV] ............... C=100, gamma=0.0001, kernel=linear, total=   0.1s
[CV] C=100, gamma=0.0001, kernel=rbf .................................
[CV] .................. C=100, gamma=0.0001, kernel=rbf, total=   0.0s
[CV] C=100, gamma=0.0001, kernel=rbf .................................
[CV] .................. C=100, gamma=0.0001, kernel=rbf, total=   0.0s
[CV] C=100, gamma=0.0001, kernel=rbf .................................
[CV] .................. C=100, gamma=0.0001, kernel=rbf, total=   0.0s
[CV] C=100, gamma=0.0001, kernel=rbf .................................
[CV] .................. C=100, gamma=0.0001, kernel=rbf, total=   0.0s
[CV] C=100, gamma=0.0001, kernel=rbf .................................
[CV] .................. C=100, gamma=0.0001, kernel=rbf, total=   0.0s
[CV] C=1000, gamma=1, kernel=linear ..................................
[CV] ................... C=1000, gamma=1, kernel=linear, total=   2.5s
[CV] C=1000, gamma=1, kernel=linear ..................................
[CV] ................... C=1000, gamma=1, kernel=linear, total=   1.7s
[CV] C=1000, gamma=1, kernel=linear ..................................
[CV] ................... C=1000, gamma=1, kernel=linear, total=   1.5s
[CV] C=1000, gamma=1, kernel=linear ..................................
[CV] ................... C=1000, gamma=1, kernel=linear, total=   1.6s
[CV] C=1000, gamma=1, kernel=linear ..................................
[CV] ................... C=1000, gamma=1, kernel=linear, total=   3.2s
[CV] C=1000, gamma=1, kernel=rbf .....................................
[CV] ...................... C=1000, gamma=1, kernel=rbf, total=   0.0s
[CV] C=1000, gamma=1, kernel=rbf .....................................
[CV] ...................... C=1000, gamma=1, kernel=rbf, total=   0.0s
[CV] C=1000, gamma=1, kernel=rbf .....................................
[CV] ...................... C=1000, gamma=1, kernel=rbf, total=   0.0s
[CV] C=1000, gamma=1, kernel=rbf .....................................
[CV] ...................... C=1000, gamma=1, kernel=rbf, total=   0.0s
[CV] C=1000, gamma=1, kernel=rbf .....................................
[CV] ...................... C=1000, gamma=1, kernel=rbf, total=   0.0s
[CV] C=1000, gamma=0.1, kernel=linear ................................
[CV] ................. C=1000, gamma=0.1, kernel=linear, total=   2.5s
[CV] C=1000, gamma=0.1, kernel=linear ................................
[CV] ................. C=1000, gamma=0.1, kernel=linear, total=   1.7s
[CV] C=1000, gamma=0.1, kernel=linear ................................
[CV] ................. C=1000, gamma=0.1, kernel=linear, total=   1.5s
[CV] C=1000, gamma=0.1, kernel=linear ................................
[CV] ................. C=1000, gamma=0.1, kernel=linear, total=   1.6s
[CV] C=1000, gamma=0.1, kernel=linear ................................
[CV] ................. C=1000, gamma=0.1, kernel=linear, total=   3.2s
[CV] C=1000, gamma=0.1, kernel=rbf ...................................
[CV] .................... C=1000, gamma=0.1, kernel=rbf, total=   0.0s
[CV] C=1000, gamma=0.1, kernel=rbf ...................................
[CV] .................... C=1000, gamma=0.1, kernel=rbf, total=   0.0s
[CV] C=1000, gamma=0.1, kernel=rbf ...................................
[CV] .................... C=1000, gamma=0.1, kernel=rbf, total=   0.0s
[CV] C=1000, gamma=0.1, kernel=rbf ...................................
[CV] .................... C=1000, gamma=0.1, kernel=rbf, total=   0.0s
[CV] C=1000, gamma=0.1, kernel=rbf ...................................
[CV] .................... C=1000, gamma=0.1, kernel=rbf, total=   0.0s
[CV] C=1000, gamma=0.001, kernel=linear ..............................
[CV] ............... C=1000, gamma=0.001, kernel=linear, total=   2.4s
[CV] C=1000, gamma=0.001, kernel=linear ..............................
[CV] ............... C=1000, gamma=0.001, kernel=linear, total=   1.7s
[CV] C=1000, gamma=0.001, kernel=linear ..............................
[CV] ............... C=1000, gamma=0.001, kernel=linear, total=   1.5s
[CV] C=1000, gamma=0.001, kernel=linear ..............................
[CV] ............... C=1000, gamma=0.001, kernel=linear, total=   1.6s
[CV] C=1000, gamma=0.001, kernel=linear ..............................
[CV] ............... C=1000, gamma=0.001, kernel=linear, total=   3.2s
[CV] C=1000, gamma=0.001, kernel=rbf .................................
[CV] .................. C=1000, gamma=0.001, kernel=rbf, total=   0.0s
[CV] C=1000, gamma=0.001, kernel=rbf .................................
[CV] .................. C=1000, gamma=0.001, kernel=rbf, total=   0.0s
[CV] C=1000, gamma=0.001, kernel=rbf .................................
[CV] .................. C=1000, gamma=0.001, kernel=rbf, total=   0.0s
[CV] C=1000, gamma=0.001, kernel=rbf .................................
[CV] .................. C=1000, gamma=0.001, kernel=rbf, total=   0.0s
[CV] C=1000, gamma=0.001, kernel=rbf .................................
[CV] .................. C=1000, gamma=0.001, kernel=rbf, total=   0.0s
[CV] C=1000, gamma=0.0001, kernel=linear .............................
[CV] .............. C=1000, gamma=0.0001, kernel=linear, total=   2.4s
[CV] C=1000, gamma=0.0001, kernel=linear .............................
[CV] .............. C=1000, gamma=0.0001, kernel=linear, total=   1.7s
[CV] C=1000, gamma=0.0001, kernel=linear .............................
[CV] .............. C=1000, gamma=0.0001, kernel=linear, total=   1.5s
[CV] C=1000, gamma=0.0001, kernel=linear .............................
[CV] .............. C=1000, gamma=0.0001, kernel=linear, total=   1.6s
[CV] C=1000, gamma=0.0001, kernel=linear .............................
[CV] .............. C=1000, gamma=0.0001, kernel=linear, total=   3.2s
[CV] C=1000, gamma=0.0001, kernel=rbf ................................
[CV] ................. C=1000, gamma=0.0001, kernel=rbf, total=   0.0s
[CV] C=1000, gamma=0.0001, kernel=rbf ................................
[CV] ................. C=1000, gamma=0.0001, kernel=rbf, total=   0.0s
[CV] C=1000, gamma=0.0001, kernel=rbf ................................
[CV] ................. C=1000, gamma=0.0001, kernel=rbf, total=   0.0s
[CV] C=1000, gamma=0.0001, kernel=rbf ................................
[CV] ................. C=1000, gamma=0.0001, kernel=rbf, total=   0.0s
[CV] C=1000, gamma=0.0001, kernel=rbf ................................
[CV] ................. C=1000, gamma=0.0001, kernel=rbf, total=   0.0s
[Parallel(n_jobs=1)]: Done 160 out of 160 | elapsed:   48.2s finished
Out[67]:
GridSearchCV(cv=5, error_score='raise-deprecating',
             estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
                           decision_function_shape='ovr', degree=3,
                           gamma='auto_deprecated', kernel='rbf', max_iter=-1,
                           probability=False, random_state=None, shrinking=True,
                           tol=0.001, verbose=False),
             iid='warn', n_jobs=None,
             param_grid={'C': [1, 10, 100, 1000],
                         'gamma': [1, 0.1, 0.001, 0.0001],
                         'kernel': ['linear', 'rbf']},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=2)
In [68]:
grid.best_params_
Out[68]:
{'C': 1, 'gamma': 0.1, 'kernel': 'rbf'}
In [0]:
grid_predict = grid.predict(X_test)
In [70]:
print(classification_report(y_test,grid_predict))
print(confusion_matrix(y_test,grid_predict))
              precision    recall  f1-score   support

         bus       0.99      0.90      0.94        77
         car       0.96      0.90      0.93       122
         van       0.79      1.00      0.88        55

    accuracy                           0.92       254
   macro avg       0.91      0.93      0.92       254
weighted avg       0.93      0.92      0.92       254

[[ 69   4   4]
 [  1 110  11]
 [  0   0  55]]
In [0]:
## HEre we see that we have reached a 91% of accuracy .